{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "6155ff16-106d-4c1c-a2ec-5d45f0d3c219",
   "metadata": {},
   "outputs": [],
   "source": [
    "file_path = \"..\\data\\wzq-medical-cases.xlsx\" # The path of original medical cases\n",
    "output_file = \"..\\data\\output.xlsx\" # Define the path of the output Excel file"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "3253fcc8-391a-439d-a71b-ba5038c98a3b",
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "df = pd.read_excel(file_path)\n",
    "texts = df['Symptom']\n",
    "print(texts[:10])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "9a09e6d7-1e3a-4371-b992-19be5b34df6f",
   "metadata": {},
   "outputs": [],
   "source": [
    "from zhipuai import ZhipuAI\n",
    "client = ZhipuAI(api_key=\"zhipu_api_key\") # Fill in your own APIKey\n",
    "def extract(text):\n",
    "    response = client.chat.completions.create(\n",
    "        model=\"glm-3-turbo\",  # Fill in the entity type to be extracted\n",
    "        messages=[\n",
    "            {\"role\": \"system\", \"content\": '''你是一位专业的医生，你的任务是将文本中的症状转换为更规范的语言表述，使其成为适合构建知识图谱的节点，将不同症状用逗号分隔，并注明对照关系，不要改动原意。\n",
    "            以下为示例：\n",
    "            Input:[‘头眩胀痛’, ‘咳嗽发热’, ‘鼻塞多涕’, ‘欲作呕恶’, ‘两手脉俱反关，弦劲而滑’]\n",
    "            Output:头晕，头胀痛，咳嗽，发热，鼻塞，流涕，恶心欲呕，脉反管，脉弦，脉滑'''},\n",
    "            {\"role\": \"user\", \"content\": '''将以下文本中的症状转换为规范的语言表述。\n",
    "            Input: ''' + text + '''\n",
    "            Output:'''}\n",
    "        ],\n",
    "    )\n",
    "    result = response.choices[0].message\n",
    "    return result"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "6a718e59-449c-4ef2-9e8e-9aac62e9e88e",
   "metadata": {},
   "outputs": [],
   "source": [
    "with pd.ExcelWriter(output_file, engine='openpyxl', mode='w') as writer:\n",
    "    pd.DataFrame(columns=['Original symptom', 'Standardized symptom']).to_excel(writer, sheet_name='Sheet1', index=False)\n",
    "    columns = ['Original symptom', 'Standardized symptom']\n",
    "    n = 0\n",
    "    startrow = 1\n",
    "    \n",
    "    for text in texts:\n",
    "        n += 1\n",
    "        result = extract(text).content\n",
    "        fill_text = result if result else [float('NaN')]  # If there is no symptom information, fill in 'NaN'\n",
    "        df = pd.DataFrame([{columns[0]: text, columns[1]: fill_text}]) # Adjust the columns in the output Excel here\n",
    "        df.to_excel(writer, sheet_name='Sheet1', index=False, header=(n == 0), startrow=startrow)\n",
    "        startrow += 1\n",
    "        print(f\"Article {n}: Symptoms of medical cases have been standardized\")\n",
    "        \n",
    "print(f\"Data saved to {output_file}\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "8d324af6-8e03-47d8-9bab-df92fa6318f5",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "QAM",
   "language": "python",
   "name": "qam"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.12.6"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
